{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# Visualization\n",
    "\n",
    "可能要分成多个图\n",
    "- 按照version绘图，表示出标记正确的version和标记错误的version以及占比\n",
    "- 按照API绘图，以API的链接作为区分，表示出正确和错误的占比\n",
    "- 分别以可否访问endpoint 和 portal/homepage 作为API是否存活的依据，绘制上面两个图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import json\n",
    "from crawler.data_objects import SimplifiedApi, VersionVisit, VisitStatus"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 读取version_status 文件\n",
    "version_visits = []\n",
    "for i in range(1, 6):\n",
    "    text = ''\n",
    "    with open('version_status0915-' + str(i) + '.txt', 'r', encoding='utf-8') as file:\n",
    "        version_visits_dicts = json.load(fp=file)\n",
    "    print(len(text))\n",
    "    # json 解析\n",
    "    for visit_dict in version_visits_dicts:\n",
    "        from_api_dict = visit_dict['from_api']\n",
    "        from_api = SimplifiedApi(from_api_dict['api_title'], from_api_dict['url'])\n",
    "        visit_status_list = []\n",
    "        for status in visit_dict['visit_status']:\n",
    "            visit_status_list.append(VisitStatus(status['visit_url'], status['visit_label'], status['status_code']))\n",
    "        version_visits.append(VersionVisit(visit_dict['version_title'], visit_dict['version_url'], from_api, visit_status_list, visit_dict['is_accessible'], visit_dict['is_endpoint_accessbile'], visit_dict['is_homepage_accessible']))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "len(version_visits)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "def show_pie(version_visits):\n",
    "    # 分成四个图\n",
    "    # 先统计\n",
    "    # 1. 按照 endpoint 统计\n",
    "    # 2. 按照 portal 统计\n",
    "    apis = set()\n",
    "    endpoint_accessible_versions = []\n",
    "    endpoint_accessible_apis = set()\n",
    "    homepage_accessible_versions = []\n",
    "    homepage_accessible_apis = set()\n",
    "    for visit in version_visits:\n",
    "        # 如果是对象\n",
    "        from_api_url = visit.from_api.url.strip()\n",
    "        apis.add(from_api_url)\n",
    "\n",
    "        for status in visit.visit_status:\n",
    "            if status.visit_label == 'API Endpoint':\n",
    "                if status.status_code == 200 or status.status_code == 429:\n",
    "                    endpoint_accessible_versions.append(visit)\n",
    "                    endpoint_accessible_apis.add(from_api_url)      # 这就保证了只要有一个version是可访问的，这个API就作为可访问的\n",
    "            elif status.visit_label == 'API Portal / Home Page':\n",
    "                if status.status_code == 200 or status.status_code == 429:\n",
    "                    homepage_accessible_versions.append(visit)\n",
    "                    homepage_accessible_apis.add(from_api_url)\n",
    "\n",
    "    # 统计完了\n",
    "\n",
    "    # 绘图\n",
    "    version_length = len(version_visits)\n",
    "    api_length = len(apis) + 1951\n",
    "\n",
    "    plt.suptitle(\"Active versions' accessibility\")\n",
    "    p1 = plt.subplot(1, 2, 1)\n",
    "    plt.title('API Endpoint')\n",
    "    endpoint_length = len(endpoint_accessible_versions)\n",
    "    plt.pie(x=[endpoint_length, version_length - endpoint_length], labels=['Accessible', 'Inaccessible'], autopct='%.2f%%')\n",
    "\n",
    "    p2 = plt.subplot(1, 2, 2)\n",
    "    plt.title('API Portal / Home Page')\n",
    "    homepage_length = len(homepage_accessible_versions)\n",
    "    plt.pie(x=[homepage_length, version_length - homepage_length], labels=['Accessible', 'Inaccessible'], autopct='%.2f%%')\n",
    "    plt.show()\n",
    "\n",
    "    plt.suptitle(\"Active APIs' accessibility\")\n",
    "    p3 = plt.subplot(1, 2, 1)\n",
    "    plt.title('API Endpoint')\n",
    "    endpoint_api_length = len(endpoint_accessible_apis)\n",
    "    plt.pie(x=[endpoint_api_length, api_length - endpoint_api_length], labels=['Accessible', 'Inaccessible'], autopct='%.2f%%')\n",
    "\n",
    "    p4 = plt.subplot(1, 2, 2)\n",
    "    plt.title('API Portal / Home Page')\n",
    "    homepage_api_length = len(homepage_accessible_apis)\n",
    "    plt.pie(x=[homepage_api_length, api_length - homepage_api_length], labels=['Accessible', 'Inaccessible'], autopct='%.2f%%')\n",
    "    plt.show()\n",
    "\n",
    "    print('Judged by API Endpoint, ' + str(endpoint_api_length) + ' of ' + str(api_length) + ' in all are active')\n",
    "    print('Judged by API Homepage, ' + str(homepage_api_length) + ' of ' + str(api_length) + ' in all are active')\n",
    "\n",
    "show_pie(version_visits)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}